/*
Code to compare foreclosures in the ASMB to those from BlackKnight / McDash
*/

****************************************
***** Merge McDash data on loans at origination to Ryan Sandler's data on foreclosures
****************************************

use "$SANDLER_DATA", clear
keep if delinq_start_yr>=2012 // as with the ASMB, focus on only loans current within four years

gen date_monthly = ym(year(monthly_reporting_period),month(monthly_reporting_period))
format %tm date_monthly
format delinq_start_mo %tm

gen ASMB_foreclosure_2016 = (fc==1) & (delinq_start_yr>=2012) & (date_monthly>=ym(2015,1)) & (date_monthly<=ym(2016,8)) //2016 ASMB was mailed in August 2016
gen ASMB_foreclosure_2017 = (fc==1) & (delinq_start_yr>=2013) & (date_monthly>=ym(2016,1)) & (date_monthly<=ym(2017,7)) //2016 ASMB was mailed in July 2017

format monthly_reporting_period %td
gen year = year(monthly_reporting_period)
gen ASMB_foreclosure = ASMB_foreclosure_2016 | ASMB_foreclosure_2017

collapse (max)   ASMB_foreclosure fc orig_year debt_to_income_ratio, by(loan_identifier)

rename loan_identifier loan_id
tostring(loan_id), replace

merge 1:1 loan_id using "$DATA_MCDASH/McDash_Loans"
drop if _merge==2
tab _merge // The McDash data is about a 40% random sample.  Ryan Sandler's data is a 10% sample.  Hence after merging we are left with roughly a 4% total random sample

keep if _merge==3
save "$DATA_OUT/processing/McDash_Loans_and_Foreclosures", replace



****************************************
***** Collapse to get statistics for McDash data
****************************************

use "$DATA_OUT/processing/McDash_Loans_and_Foreclosures", clear
gen arm = inlist(interest_type,2,3) 

replace fc = 2 if (ASMB_foreclosure==1)

gen one = 1

collapse (mean) debt_to_income_ratio orig_year balloon_id arm original_interest_rate original_term original_loan_amount original_property_value original_ltv original_credit_score  (sum) num_obs = one, by(fc)

save "$DATA_OUT/processing/McDash_foreclosure_stats", replace


******************************************************************************
**** Now get statistics for ASMB foreclosures
********************************************************************************

use "$DATA_OUT/NMDB_ASMB_respondents", clear
svyset [pweight = analysis_weight]
keep if in_default_sample

keep if foreclosure==1
collapse (mean) dti balloon_flag  interest arm score_orig_1 income value open_year ltv term loan_amt  [pweight = analysis_weight]
order interest arm score_orig income value open_year ltv

/**********************************************************************************************
Finally compare foreclosures in the two datasets
**********************************************************************************************/

* rename ASMB variables to have same name as in McDash
rename dti debt_to_income_ratio 
rename open_year orig_year
rename balloon_flag balloon_id
rename interest original_interest_rate 
rename value original_property_value 
rename score_orig original_credit_score
rename ltv original_ltv
rename term original_term
rename loan_amt original_loan_amount

gen source = "ASMB"
append using "$DATA_OUT/processing/McDash_foreclosure_stats"
replace source = "McDash" if mi(source)
order source
drop if inlist(fc,0,1) // drop stats for non-foreclosures or foreclosures before 2016 in McDash data
drop fc income

foreach var in original_interest_rate original_ltv {
replace `var' = 100 * `var' if source=="McDash"
}

foreach var in arm balloon {
replace `var' = 100 * `var' 
}

foreach var in original_loan_amount original_property_value {
replace `var' = `var' / 1000 
}


foreach var in original_credit_score original_ltv debt_to_income_ratio original_interest_rate original_property_value original_loan_amount original_term orig_year arm balloon_id {
	replace `var' = round(`var',.1)
}

order source original_credit_score original_ltv debt_to_income_ratio original_interest_rate original_property_value original_loan_amount original_term orig_year arm balloon_id


